/***
This do-file creates the CPS national series by wage quartile and citizenship 
status (citizen from birth, naturalized, non-citizen) used in our 
analysis. We process the CPS data in the most analogous way possible to our 
processing in the employment pipeline, so we apply the same methodology to 
define the wage quartiles.
***/

*-------------------------------------------------------------------------------
* Set up
*-------------------------------------------------------------------------------

* Set $root 
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"
local category "Employment"

* Create directories
cap mkdir "${root}/data/derived/CPS"

*-------------------------------------------------------------------------------
* 1 - Get multipliers
*-------------------------------------------------------------------------------

* Open Thresholds
project, uses("${root}/data/dvc/Employment/poverty_thresholds.dta")
use "${root}/data/dvc/Employment/poverty_thresholds.dta", clear 

* Get multipliers
cap drop multiplier 
	foreach poverty in 100 150 250 {
		
		* Create variable 
		gen multiplier_`poverty' = .

		if `poverty' == 150 local levels = "19 20 21 22 23"
		else if `poverty' == 100 local levels = "13 14 15 16"

		foreach level of local levels {
			replace multiplier_`poverty' = poverty_`poverty' - `=`level'-0.5' if inrange(poverty_`poverty', `=`level'-0.5', `=`level'+0.5')
			replace multiplier_`poverty' = multiplier_`poverty' - 1 if inrange(poverty_`poverty', `=`level'', `=`level'+0.5')
		}
	}
	
tempfile multiplier
save `multiplier'

*-------------------------------------------------------------------------------
* 2 - Import CPS and apply sample restrictions
*-------------------------------------------------------------------------------

* Open CPS data
project, uses("${root}/data/dvc/CPS/cps_00037.dta")
use "${root}/data/dvc/CPS/cps_00037.dta", clear 

assert !mi(age, year)
keep if age >= 16
keep if year > 2019

gen naics = . 
			replace naics = 11 if inrange(ind, 0170, 0290)
			replace naics = 21 if inrange(ind, 0370, 0490)
			replace naics = 23 if inrange(ind, 0770, 0770)
			replace naics = 31 if inrange(ind, 1070, 1790)
			replace naics = 32 if inrange(ind, 1870, 2590)
			replace naics = 33 if inrange(ind, 2670, 3990)
			replace naics = 42 if inrange(ind, 4070, 4590)
			replace naics = 44 if inrange(ind, 4670, 5190)
			replace naics = 45 if inrange(ind, 5275, 5790)
			replace naics = 48 if inrange(ind, 6070, 6290)
			replace naics = 49 if inrange(ind, 6370, 6390)
			replace naics = 22 if inrange(ind, 0570, 0690)
			replace naics = 51 if inrange(ind, 6470, 6780)
			replace naics = 52 if inrange(ind, 6870, 6992)
			replace naics = 53 if inrange(ind, 7070, 7190)
			replace naics = 54 if inrange(ind, 7270, 7490)
			replace naics = 55 if inrange(ind, 7570, 7570)
			replace naics = 56 if inrange(ind, 7580, 7790)
			replace naics = 61 if inrange(ind, 7860, 7890)
			replace naics = 62 if inrange(ind, 7970, 8470)
			replace naics = 71 if inrange(ind, 8560, 8590)
			replace naics = 72 if inrange(ind, 8660, 8690)
			replace naics = 81 if inrange(ind, 8770, 9290)
			replace naics = 92 if inrange(ind, 9370, 9890)
order naics, after(ind)

drop if naics == 92 // drop those working in public sector to match CES (Total Private Employment)
drop if naics == 11 // drop those working in agriculture, forestry, fishing, and hunting according to BLS adjustment of CPS to CES
drop if naics == 9290 // drop workers in private households such as nannies, housekeepers, etc.

drop if inlist(classwkr, 0, 13, 25, 26, 27, 28, 29) // drop missing (0), unincorporated, self-employed (13), and all public sector employees (25-29) 

** Keep those with jobs **
keep if empstat == 10 

** Replace Missing Variables **
replace earnweek = . if earnweek == 9999.99
replace hourwage = . if hourwage == 999.99
replace uhrsworkorg = . if uhrsworkorg == 999 | uhrsworkorg == 998
replace ahrsworkt = . if ahrsworkt == 999

** Convert Weekly Earnings to Hourly Wages
replace hourwage = earnweek / uhrsworkorg if mi(hourwage) & paidhour == 2  // if paid hourly, divide weekly earnings by amount of hours usually worked
replace hourwage = earnweek / ahrsworkt if paidhour == 1 // if paid weekly, divide weekly earnings by amount of hours usually worked

* Count number of raw observations *
count if !missing(hourwage)

rename hourwage wage

gen date = mdy(month, 15, year)
format date %td

* Create citizenship indicator
gen cit_status = 1 if inrange(citizen, 1, 3)
replace cit_status = 2 if citizen == 4 
replace cit_status = 3 if citizen == 5 
 
* Create labels 
label define labels 1 "Citizen at Birth" 2 "Naturalized Citizen" 3 "Non-citizens"
label values cit_status labels 

*-------------------------------------------------------------------------------
* 3 - Merge poverty thresholds 
*-------------------------------------------------------------------------------

project, uses("${root}/data/dvc/Employment/poverty_thresholds.dta")
merge m:1 date using "${root}/data/dvc/Employment/poverty_thresholds.dta", assert(2 3) keep(3) nogen

*-------------------------------------------------------------------------------
* 4 - Gen quartiles
*-------------------------------------------------------------------------------

gen quartile = 1 if wage <= poverty_100 & !mi(wage)
replace quartile = 2 if wage > poverty_100 & wage <= poverty_150 & !mi(wage)
replace quartile = 3 if wage > poverty_150 & wage <= poverty_250 & !mi(wage)
replace quartile = 4 if wage > poverty_250 & !mi(wage)

*-------------------------------------------------------------------------------
* 5 - Smooth round numbers
*-------------------------------------------------------------------------------

* Get mass at integer wages
gen count = 1

foreach level in 13 14 15 16 19 20 21 22 23 {
preserve
collapse (sum) mass_`level' = count if wage == `level' [pw=earnwt], by(date cit_status)
tempfile mass_`level'
save `mass_`level''
restore
}

* Collapse 
collapse (sum) employment_cps = count  [pw=earnwt], by(date quartile cit_status)  

* Get poverty thresholds again 
project, uses("${root}/data/dvc/Employment/poverty_thresholds.dta")
merge m:1 date using "${root}/data/dvc/Employment/poverty_thresholds.dta", keep(3) nogen 

* Get multipliers  
merge m:1 date  using `multiplier', keep(3) nogen 

* Merge in Mass
foreach num in 13 14 15 16 19 20 21 22 23 {
merge m:1 date cit_status using `mass_`num'', nogen 
}

* Add in multiplier * mass 
gen adjustment_100 = 0 
gen adjustment_150 = 0 

foreach level in 13 14 15 16 {
replace adjustment_100 = multiplier_100*mass_`level' if inrange(poverty_100, `level'-0.499, `level'+0.5)
}

foreach level in 19 20 21 22 23 {
replace adjustment_150 = multiplier_150*mass_`level' if inrange(poverty_150, `level'-0.499, `level'+0.5)

}

rename (adjustment_100 adjustment_150) (adjustment_1 adjustment_2)

gegen cell = group(quartile date)

* Fix 
foreach level in 1 2  {
	bys cell: gegen temp_`level' = mean(adjustment_`level')
	replace adjustment_`level' = temp_`level'
}

* Make smoothed variable
gen emp_cps_smooth = . 
replace emp_cps_smooth = employment_cps + adjustment_1 if quartile == 1 
replace emp_cps_smooth = employment_cps + adjustment_2 - adjustment_1 if quartile == 2 
replace emp_cps_smooth = employment_cps - adjustment_2 if quartile == 3
replace emp_cps_smooth = employment_cps if mi(emp_cps_smooth)

*-------------------------------------------------------------------------------
* 6 - Norm
*-------------------------------------------------------------------------------

drop if mi(quartile, cit_status)
gen jan = emp_cps_smooth if month(date) == 1 & year(date) == 2020 
bys quartile cit_status: gegen base = mean(jan) 
gen norm_emp_cps = 100 * (emp_cps_smooth / base - 1) 

drop base jan

sort quartile date 

keep date quartile norm_emp_cps cit_status

save "${root}/data/derived/CPS/CPS by wage quartile and citizenship.dta", replace
project, creates("${root}/data/derived/CPS/CPS by wage quartile and citizenship.dta")
